package com.qigao.mall.commons.region;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
 * @author jx on 2019/1/3.
 * https://github.com/small-dream/China_Province_City
 */

public class RegionAreaProcess {
    private static final Logger logger= LoggerFactory.getLogger(RegionAreaProcess.class);
    private static final String AREAURL="AREAURL";
    private static final String PROCLASS="PROCLASS";
    private static final String AREACLASS="AREACLASS";

    private static Map<String,String> historyMap=new HashMap<>();
    static {
        historyMap.put("201909-"+AREAURL,"http://www.mca.gov.cn/article/sj/xzqh/2019/2019/201911051008.html");
        historyMap.put("201909-"+PROCLASS,"xl7027502");
        historyMap.put("201909-"+AREACLASS,"xl7127502");
        historyMap.put("201908-"+AREAURL,"http://www.mca.gov.cn/article/sj/tjyb/qgsj/2019/201909291543.html");
        historyMap.put("201908-"+PROCLASS,"xl7026399");
        historyMap.put("201908-"+AREACLASS,"xl7126399");
        historyMap.put("201907-"+AREAURL,"http://www.mca.gov.cn/article/sj/xzqh/2019/201908/201908271607.html");
        historyMap.put("201907-"+PROCLASS,"xl7010750");
        historyMap.put("201907-"+AREACLASS,"xl7110750");
        historyMap.put("201906-"+AREAURL,"http://www.mca.gov.cn/article/sj/xzqh/2019/201901-06/201908050812.html");
        historyMap.put("201906-"+PROCLASS,"xl7011159");
        historyMap.put("201906-"+AREACLASS,"xl7111159");
        historyMap.put("201905-"+AREAURL,"http://www.mca.gov.cn/article/sj/xzqh/2019/201901-06/201906211421.html");
        historyMap.put("201905-"+PROCLASS,"xl7428234");
        historyMap.put("201905-"+AREACLASS,"xl7528234");
        historyMap.put("201904-"+AREAURL,"http://www.mca.gov.cn/article/sj/xzqh/2019/201901-06/201905271424.html");
        historyMap.put("201904-"+PROCLASS,"xl7428234");
        historyMap.put("201904-"+AREACLASS,"xl7528234");
        historyMap.put("201903-"+AREAURL,"http://www.mca.gov.cn/article/sj/xzqh/2019/201901-06/201904301434.html");
        historyMap.put("201903-"+PROCLASS,"xl7428234");
        historyMap.put("201903-"+AREACLASS,"xl7528234");
        historyMap.put("201902-"+AREAURL,"http://www.mca.gov.cn/article/sj/xzqh/2019/201901-06/201903221437.html");
        historyMap.put("201902-"+PROCLASS,"xl7428234");
        historyMap.put("201902-"+AREACLASS,"xl7528234");
        historyMap.put("201901-"+AREAURL,"http://www.mca.gov.cn/article/sj/xzqh/2019/201901-06/201902061442.html");
        historyMap.put("201901-"+PROCLASS,"xl7428234");
        historyMap.put("201901-"+AREACLASS,"xl7528234");

        historyMap.put("202005-"+AREAURL,"http://www.mca.gov.cn/article/sj/xzqh/2020/2020/2020072805001.html");
        historyMap.put("202005-"+PROCLASS,"xl728204");
        historyMap.put("202005-"+AREACLASS,"xl738204");
    }


    public static void main(String[] args) {
        try {
            //2019年9月中华人民共和国县以上行政区划代码网页
            Document doc = Jsoup.connect("http://www.mca.gov.cn/article/sj/xzqh/2019/2019/201911051008.html").maxBodySize(0).get();
            //省和市
            Elements elementsProAndCity = doc.getElementsByClass("xl7027502");
            List<String> stringListProAndCity = elementsProAndCity.eachText();
            //区县列表
            Elements elements = doc.getElementsByClass("xl7127502");
            List<String> stringList = elements.eachText();
            List<String> stringName = new ArrayList<String>();
            List<String> stringCode = new ArrayList<String>();
            stringListProAndCity.addAll(stringList);
            for (int i = 0; i < stringListProAndCity.size(); i++) {
                 if (i % 2 == 0) {
                    //地区代码
                    stringCode.add(stringListProAndCity.get(i));
                } else {
                    //地区名字
                    stringName.add(stringListProAndCity.get(i));
                    System.out.println("区域名称:"+stringListProAndCity.get(i));
                }
            }
            //正常情况 两个 list size 应该 一样
            System.out.println("stringName  size= " + stringName.size() + "   stringCode   size= " + stringCode.size());
            if (stringName.size() != stringCode.size()) {
                throw new RuntimeException("数据错误");
            }
            List<Province> provinceList = processData(stringName, stringCode,"201909");
            String path = FileUtils.getProjectDir() + "/2019年9月中华人民共和国县以上行政区划代码" + ".json";
            JSONFormatUtils.jsonWriter(provinceList, path);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    public static List<Province> getAreaList(String ym) {
        String urlStr=historyMap.get(ym+"-"+AREAURL);
        String priviceClassStr=historyMap.get(ym+"-"+PROCLASS);
        String areaClassStr=historyMap.get(ym+"-"+AREACLASS);
        if(urlStr==null || "".equals(urlStr)){
            logger.info("{}抓取地址不存在，请到官方网站确认地址后，维护http://www.mca.gov.cn/article/sj/xzqh/",ym);
            return null;
        }
        return getAreaList(urlStr,priviceClassStr,areaClassStr,ym);
    }

    /**
     *
     * @param urlStr
     * @param priviceClassStr
     * @param areaClassStr
     * @param ym 公布年月 类似 201909
     * @return
     */
    public static List<Province> getAreaList(String urlStr,String priviceClassStr,String areaClassStr,String ym) {
        //2019年9月中华人民共和国县以上行政区划代码网页
         String areaUrl=urlStr==null?"http://www.mca.gov.cn/article/sj/xzqh/2019/2019/201911051008.html":urlStr;
         String proAndCityClass=priviceClassStr==null?"xl7027502":priviceClassStr;
        String areaClass=priviceClassStr==null?"xl7127502":areaClassStr;
        String publishYm=ym==null?"201909":ym;
        try{
            Document doc = Jsoup.connect(areaUrl).maxBodySize(0).get();
            //省和市
            Elements elementsProAndCity = doc.getElementsByClass(proAndCityClass);
            List<String> stringListProAndCity = elementsProAndCity.eachText();
            //区县列表
            Elements elements = doc.getElementsByClass(areaClass);
            List<String> stringList = elements.eachText();
            List<String> stringName = new ArrayList<String>();
            List<String> stringCode = new ArrayList<String>();
            stringListProAndCity.addAll(stringList);
            for (int i = 0; i < stringListProAndCity.size(); i++) {
                if (i % 2 == 0) {
                    //地区代码
                    stringCode.add(stringListProAndCity.get(i));
                } else {
                    //地区名字
                    stringName.add(stringListProAndCity.get(i));
                }
            }
            //正常情况 两个 list size 应该 一样
            System.out.println("stringName  size= " + stringName.size() + "   stringCode   size= " + stringCode.size());
            if (stringName.size() != stringCode.size()) {
                throw new RuntimeException("数据错误");
            }
            List<Province> provinceList = processData(stringName, stringCode,publishYm);
            return provinceList;
        }catch (Exception e){

        }
        return null;

    }

    /**
     * 生成省份列表数据
     *
     * @param stringName
     * @param stringCode
     * @return
     */

    private static List<Province> processData(List<String> stringName, List<String> stringCode,String publishYm) {
        List<Province> provinceList = new ArrayList<Province>();
        for (int i = 0; i < stringCode.size(); i++) {
            String provinceName = stringName.get(i);
            String provinceCode = stringCode.get(i);
            if (provinceCode.endsWith("0000")) {
                Province province = new Province();
                provinceList.add(province);
                province.setCode(provinceCode);
                province.setName(provinceName);
                province.setPublishYm(publishYm);
                System.out.println("省份:"+provinceName);
                List<City> cities = new ArrayList<City>();
                province.setCityList(cities);

                //直辖市 城市和省份名称一样
                if (provinceName.contains("北京") || provinceName.contains("上海") || provinceName.contains("天津") || provinceName.contains("重庆")) {
                    City city = new City();
                    List<Area> areas = new ArrayList<Area>();
                    city.setName(provinceName);
                    city.setCode(provinceCode);
                    city.setAreaList(areas);
                    cities.add(city);
                    System.out.println("直辖市："+provinceName);
                    //县区
                    for (int k = 0; k < stringCode.size(); k++) {
                        String areaName = stringName.get(k);
                        String areaCode = stringCode.get(k);
                        if (!provinceCode.equals(areaCode) && areaCode.startsWith(provinceCode.substring(0, 2))) {
                            System.out.println("直辖市的区县："+areaName);
                            Area area = new Area();
                            area.setName(areaName);
                            area.setCode(areaCode);
                            areas.add(area);
                        }
                    }
                }
                for (int j = 0; j < stringCode.size(); j++) {
                    String cityName = stringName.get(j);
                    String cityCode = stringCode.get(j);
                    //遍历获取地级市
                    if (!cityCode.equals(provinceCode) && cityCode.startsWith(provinceCode.substring(0, 2)) && cityCode.endsWith("00")) {
                        System.out.println("地级市:"+cityName);
                        City city = new City();
                        List<Area> areas = new ArrayList<Area>();
                        city.setName(cityName);
                        city.setCode(cityCode);
                        city.setAreaList(areas);
                        cities.add(city);
                        //遍历获取县区
                        for (int k = 0; k < stringCode.size(); k++) {
                            String areaName = stringName.get(k);
                            String areaCode = stringCode.get(k);
                            if (!areaCode.equals(cityCode) && areaCode.startsWith(cityCode.substring(0, 4))) {
                                System.out.println("县区:"+areaName);
                                Area area = new Area();
                                area.setName(areaName);
                                area.setCode(areaCode);
                                areas.add(area);
                            }
                        }
                    }
                }
            }
        }
        return provinceList;
    }
}
